/*===================================================================
lagged.do
	*Started:	
	*Updated:	2021-08-13
	*Author(s):	Charlie Law
	*Purpose:	Creates person-level lagged outcomes for enrollment in HMIS
===================================================================*/


*Raw program enrollment data; merge to assessments
	clear 
	import excel "$datadir\LEO PR-VISPDAT 5.3.2021.xlsx", sheet("Programs") firstrow clear
	rename ClientsUniqueIdentifier HMISID 
	duplicates report HMISID EnrollmentsProjectStartDate ProgramsProjectTypeCode ProgramsFullName
	duplicates drop HMISID EnrollmentsProjectStartDate ProgramsProjectTypeCode ProgramsFullName, force 
	count /*3,541- number of enrollments */ 
	merge m:1 HMISID using "$datadir/assmt.dta", keepusing(startdate first_assessment_dt) /*everyone enrolled in a program can be matched to an assessment, using file is the full assessment file*/ 
	drop if _merge == 1
	drop _merge
	
*Process enrollment file and define enrollment outcomes
	codebook EnrollmentsProjectStartDate
	gen mm_e=month(EnrollmentsProjectStartDate)
	gen q_e=quarter(EnrollmentsProjectStartDate)
	gen year_e=year(EnrollmentsProjectStartDate)
	order mm_e year_e, after (EnrollmentsProjectStartDate)
	gen month_e=12*(year_e-2018)+mm_e -6 /*month 1 is july 2018, month 25 is july 2020*/ 
	label var month_e "Month of Enrollment" 
	label values month_e months 
	
	*----------------------------------------------------------------------------------------------------------------------
	
* Create lagged indicator for HMIS outcomes
		
	// Note: The earliest 'EnrollmentsProjectStartDate' is in 1998 & the earliest 'startdate' is in 2018, so we do have data for at least a year back for all people in the RCT. 
	
	gen diff = startdate - EnrollmentsProjectStartDate
	
	* Want to break out prior engagement vars by prior engagement types
	
	bysort HMISID: egen countofpriorengagement = total(EnrollmentsProjectStartDate < startdate & diff < 366 & diff > 0) // creates a count of how many times each person (each HMIS ID) was recorded in HMIS within 1 year before the RCT
	bysort HMISID: gen priorengagementbinary = diff < 366 & diff > 0				// creates a dummy var (0 or 1) for whether each person was recorded in HMIS within 1 year before the RCT
	
	* Vars if ProgramsProjectTypeCode == "Homeless Prevention"
	
	bysort HMISID: egen countofpriorengagement_hp = total(EnrollmentsProjectStartDate < startdate & diff < 366 & diff > 0 & ProgramsProjectTypeCode == "Homeless Prevention")
	bysort HMISID: gen priorengagementbinary_hp = diff < 366 & diff > 0 & ProgramsProjectTypeCode == "Homeless Prevention"
	
	* Vars if ProgramsProjectTypeCode != "Homeless Prevention"
	
	bysort HMISID: egen countofpriorengagement_nothp = total(EnrollmentsProjectStartDate < startdate & diff < 366 & diff > 0 & ProgramsProjectTypeCode != "Homeless Prevention")	
	bysort HMISID: gen priorengagementbinary_nothp = diff < 366 & diff > 0 & ProgramsProjectTypeCode != "Homeless Prevention"
	
	* Collapse the data to the desired vars
	
	collapse (max) priorengagementbinary priorengagementbinary_hp priorengagementbinary_nothp, by(HMISID countofpriorengagement countofpriorengagement_hp countofpriorengagement_nothp)
	
	* Label vars
	
	label variable countofpriorengagement "Prior Engagement, past 1 year--count"
	label variable countofpriorengagement_hp "Pr. Eng. with HP, past 1 year--count"
	label variable countofpriorengagement_nothp "Pr. Eng. with Other, past 1 year--count"
	
	label variable priorengagementbinary "Prior Engagement, past 1 year--binary"
	label variable priorengagementbinary_hp "Pr. Eng. with HP, past 1 year--binary"
	label variable priorengagementbinary_nothp "Pr. Eng. with Other, past 1 year--binary"
	
	cd "$datadir"
	save lagged.dta, replace
	
	
	
	
	
	
	
	
	
	
	
	
	
	